
---
output:
  pdf_document:
    citation_package: natbib
    fig_caption: yes
    keep_tex: yes
    latex_engine: xelatex
    template: header.tex
---

```{r,include=FALSE}

library(knitr)
knitr::opts_chunk$set(eval = TRUE, echo = FALSE, message=FALSE, warning=FALSE)

library(matlib)
library(DescTools)
library(survey)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(gridExtra)
library(ggthemes)
library(plotrix)
library(plyr)
library(coefplot)
library(questionr)
library(psych)
library(snakecase)
library(questionr)
library(coefplot)
library(stargazer)
library(texreg)
library(tidyr)
library(forcats)
library(stringr)
library(kableExtra)
library(rlang)
library(naniar)

#devtools::install_github("crsh/papaja")
library(papaja)


load("CleanBrexit.RData")
load("CleanParties.RData")


```

```{r,include=FALSE}

#Analyzing the presence of egotistic bias (guesses by previous vote of respondent)

Means.Guess.by.Vote.Con<-lm(profile_Con~respondent_pastvote_2017, data=Parties.Experiment.Clean,  w=Parties.Experiment.Clean$respondent_weights)
new.dat<-data.frame(respondent_pastvote_2017=factor(c("Conservative","Labour", "Liberal Democrat",
 "Scottish National Party (SNP)", "Plaid Cymru","UK Independence Party (UKIP)","Green")))
Means.Guess.by.Vote.Con1<-data.frame(new.dat, predict(Means.Guess.by.Vote.Con, newdata = new.dat, interval = 'confidence'))

Means.Guess.by.Vote.Con1$respondent_pastvote_2017<-factor(Means.Guess.by.Vote.Con1$respondent_pastvote_2017, levels=Means.Guess.by.Vote.Con1$respondent_pastvote_2017[order(Means.Guess.by.Vote.Con1$fit)])

Means.Guess.by.Vote.Leave<-lm(profile_Leave~respondent_pastvote_EURef, data=Brexit.Experiment.Clean,  w=Brexit.Experiment.Clean$respondent_weights)
new.dat<-data.frame(respondent_pastvote_EURef=factor(c("Remain", "Leave",  "Did not vote")))
Means.Guess.by.Vote.Leave1<-data.frame(new.dat, predict(Means.Guess.by.Vote.Leave, newdata = new.dat, interval = 'confidence'))
Means.Guess.by.Vote.Leave1$respondent_pastvote_EURef<-factor(Means.Guess.by.Vote.Leave1$respondent_pastvote_EURef, levels=Means.Guess.by.Vote.Leave1$respondent_pastvote_EURef[order(Means.Guess.by.Vote.Leave1$fit)])

Means.Guess.by.Vote.Con1 <- Means.Guess.by.Vote.Con1 %>%
  mutate(respondent_pastvote_2017 =
                  recode(respondent_pastvote_2017,
         "Scottish National Party (SNP)" = "Scottish National\nParty (SNP)",  "UK Independence Party (UKIP)" = "UK Independence\nParty (UKIP)"))

guess.by.Brexit.vote<-ggplot(Means.Guess.by.Vote.Leave1, aes(x=respondent_pastvote_EURef, y=fit)) + 
  geom_errorbar(aes(ymin=lwr, ymax=upr), width=.1, position= position_dodge(w=0.5)) +
  geom_point(position= position_dodge(w=0.5), shape=19, size=2, color="red", alpha=0.7) +
  geom_hline(yintercept=51.9, linetype="dashed", 
             size=1, color="blue", alpha=0.5)+
  ylim(0, 100) +
  theme_clean()+ 
  ggtitle("Leave Vote Guess by Previous EU Referendum Vote") + theme(plot.title = element_text(hjust = 0.5)) +
  xlab("") + ylab("")+ theme(legend.position="bottom", axis.text=element_text(size=10))+
  coord_flip()

guess.by.party.vote<-ggplot(Means.Guess.by.Vote.Con1, aes(x=respondent_pastvote_2017, y=fit)) + 
  geom_errorbar(aes(ymin=lwr, ymax=upr), width=.1, position= position_dodge(w=0.5)) +
  geom_point(position= position_dodge(w=0.5), shape=19, size=2, color="red", alpha=0.7) +
  geom_hline(yintercept=51.4, linetype="dashed", 
             size=1, color="blue", alpha=0.5)+
  ylim(0, 100) +
  theme_clean()+ 
  ggtitle("Conservative Vote Guess by Previous General Election Vote") + theme(plot.title = element_text(hjust = 0.5)) +
  xlab("") + ylab("")+ theme(legend.position="bottom", axis.text=element_text(size=10))+
  coord_flip()

```
```{r,include=FALSE}

#The following chunk allows for comparisons of mean guess by profile attribute with the same mean in the BES

# Predicting the (weighted) mean guess by level of each attribute

MeansAge<-lm(profile_Leave~profile_AgeGroup, data=Brexit.Experiment.Clean,  w=Brexit.Experiment.Clean$respondent_weights)
levels(Brexit.Experiment.Clean$profile_AgeGroup)
new.dat <- data.frame(profile_AgeGroup=c("18-29", "30-44", "45-59", "60-74", "75 +"))
MeansAge1<-data.frame(new.dat, predict(MeansAge, newdata = new.dat, interval = 'confidence'))
MeansAgeBES<-lm(profile_Leave~profile_AgeGroup, data=Brexit.BES.Clean,  w=Brexit.BES.Clean$profile_weights)
MeansAgeBES2<-data.frame(new.dat, predict(MeansAgeBES, newdata = new.dat, interval = 'confidence'))

MeansAgeBES2$Probabilities<-"BES"
MeansAge1$Probabilities<-"Guess"
MeansAge.Bind<-bind_rows(MeansAge1, MeansAgeBES2)

MeansReligion<-lm(profile_Leave~profile_Religion, data=Brexit.Experiment.Clean,  w=Brexit.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_Religion=c("Roman Catholic","Church of England/Anglican/Episcopal",
                                 "Hindu","Islam",
                                 "Methodist","Presbyterian/Church of Scotland",
                                 "No religion","Unknown" ))
MeansReligion1<-data.frame(new.dat, predict(MeansReligion, newdata = new.dat, interval = 'confidence'))
MeansReligion1$profile_Religion<-factor(MeansReligion1$profile_Religion, levels=MeansReligion1$profile_Religion[order(MeansReligion1$fit)])
MeansReligionBES<-lm(profile_Leave~profile_Religion, data=Brexit.BES.Clean,  w=Brexit.BES.Clean$profile_weights)
MeansReligionBES2<-data.frame(new.dat, predict(MeansReligionBES, newdata = new.dat, interval = 'confidence'))

MeansReligionBES2$Probabilities<-"BES"
MeansReligion1$Probabilities<-"Guess"
MeansReligion.Bind<-bind_rows(MeansReligion1, MeansReligionBES2)


MeansEthnicity<-lm(profile_Leave~profile_Ethnicity, data=Brexit.Experiment.Clean,  w=Brexit.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_Ethnicity=c("Asian","Black",                            
                                  "Mixed","White",                            
                                  "Unknown"))
MeansEthnicity1<-data.frame(new.dat, predict(MeansEthnicity, newdata = new.dat, interval = 'confidence'))
MeansEthnicity1$profile_Ethnicity<-factor(MeansEthnicity1$profile_Ethnicity, levels=MeansEthnicity1$profile_Ethnicity[order(MeansEthnicity1$fit)])
MeansEthnicityBES<-lm(profile_Leave~profile_Ethnicity, data=Brexit.BES.Clean,  w=Brexit.BES.Clean$profile_weights)
MeansEthnicityBES2<-data.frame(new.dat, predict(MeansEthnicityBES, newdata = new.dat, interval = 'confidence'))


MeansEthnicityBES2$Probabilities<-"BES"
MeansEthnicity1$Probabilities<-"Guess"
MeansEthnicity.Bind<-bind_rows(MeansEthnicity1, MeansEthnicityBES2)


MeansGender<-lm(profile_Leave~profile_Gender, data=Brexit.Experiment.Clean,  w=Brexit.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_Gender=c("Male", "Female"))
MeansGender1<-data.frame(new.dat, predict(MeansGender, newdata = new.dat, interval = 'confidence'))
MeansGenderBES<-lm(profile_Leave~profile_Gender, data=Brexit.BES.Clean,  w=Brexit.BES.Clean$profile_weights)
MeansGenderBES2<-data.frame(new.dat, predict(MeansGenderBES, newdata = new.dat, interval = 'confidence'))

MeansGenderBES2$Probabilities<-"BES"
MeansGender1$Probabilities<-"Guess"
MeansGender.Bind<-bind_rows(MeansGender1, MeansGenderBES2)

MeansEducation2<-lm(profile_Leave~profile_Education, data=Brexit.Experiment.Clean,  w=Brexit.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_Education=c('Holds a university degree', 'Does not hold a university degree'))
MeansEducation3<-data.frame(new.dat, predict(MeansEducation2, newdata = new.dat, interval = 'confidence'))
MeansEducationBES<-lm(profile_Leave~profile_Education, data=Brexit.BES.Clean,  w=Brexit.BES.Clean$profile_weights)
MeansEducationBES2<-data.frame(new.dat, predict(MeansEducationBES, newdata = new.dat, interval = 'confidence'))

MeansEducationBES2$Probabilities<-"BES"
MeansEducation3$Probabilities<-"Guess"
MeansEducation.Bind<-bind_rows(MeansEducation3, MeansEducationBES2)

MeansRegion<-lm(profile_Leave~profile_Region, data=Brexit.Experiment.Clean,  w=Brexit.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_Region=factor(c("London", "North East",
                                              "North West","Scotland",
                                              "South East","South West",
                                              "the East Midlands","the East of England",
                                              "Wales","West Midlands",
                                              "Yorkshire & Humber")))
MeansRegion1<-data.frame(new.dat, predict(MeansRegion, newdata = new.dat, interval = 'confidence'))
MeansRegion1$profile_Region<-factor(MeansRegion1$profile_Region, levels=MeansRegion1$profile_Region[order(MeansRegion1$fit)])
MeansRegionBES<-lm(profile_Leave~profile_Region, data=Brexit.BES.Clean,  w=Brexit.BES.Clean$profile_weights)
MeansRegionBES2<-data.frame(new.dat, predict(MeansRegionBES, newdata = new.dat, interval = 'confidence'))
MeansRegionBES2$Probabilities<-"BES"
MeansRegion1$Probabilities<-"Guess"
MeansRegion.Bind<-bind_rows(MeansRegion1, MeansRegionBES2)


MeansRegion.Bind<-bind_rows(MeansRegion1, MeansRegionBES2)
Realvote<-Brexit.Experiment.Clean %>%
dplyr::  group_by(profile_Region) %>%
  dplyr::   summarise(fit = mean(profile_regional_real_Leave, na.rm=T))
Realvote$Probabilities<-"Real"

MeansRegion.Bind2<-bind_rows(MeansRegion.Bind, Realvote)
MeansRegion.Bind2$Probabilities<-factor(MeansRegion.Bind2$Probabilities,
       levels=c("BES"[order(1)],"Real"[order(2)],"Guess"[order(3)]))


MeansIncome<-lm(profile_Leave~profile_AnnualHouseholdIncome, data=Brexit.Experiment.Clean, w=Brexit.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_AnnualHouseholdIncome=factor(c("Less than 5,199"[order(1)],
                                                             "Between 5,200 and 15,599"[order(2)],
                                                             "Between 15,600 and 25,999"[order(3)],
                                                             "Between 26,000 and 36,399"[order(4)],
                                                             "Between 36,400 and 44,999"[order(5)],
                                                             "Between 45,000 and 59,999"[order(6)],
                                                             "Between 60,000 and 99,999"[order(7)],
                                                             "Greater than 100,000"[order(8)] )))

MeansIncome2<-data.frame(new.dat, predict(MeansIncome, newdata = new.dat, interval = 'confidence'))
MeansIncomeBES<-lm(profile_Leave~profile_AnnualHouseholdIncome, data=Brexit.BES.Clean,  w=Brexit.BES.Clean$profile_weights)
MeansIncomeBES2<-data.frame(new.dat, predict(MeansIncomeBES, newdata = new.dat, interval = 'confidence'))

MeansIncomeBES2$Probabilities<-"BES"
MeansIncome2$Probabilities<-"Guess"
MeansIncome.Bind<-bind_rows(MeansIncome2, MeansIncomeBES2)

MeansIncome.Bind$profile_AnnualHouseholdIncome<-factor(MeansIncome.Bind$profile_AnnualHouseholdIncome,
                                                       levels = c("Less than 5,199"[order(1)],
                                                             "Between 5,200 and 15,599"[order(2)],
                                                             "Between 15,600 and 25,999"[order(3)],
                                                             "Between 26,000 and 36,399"[order(4)],
                                                             "Between 36,400 and 44,999"[order(5)],
                                                             "Between 45,000 and 59,999"[order(6)],
                                                             "Between 60,000 and 99,999"[order(7)],
                                                             "Greater than 100,000"[order(8)] ))

MeansClass<-lm(profile_Leave~profile_SubClass, data=Brexit.Experiment.Clean,  w=Brexit.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_SubClass=c("Working class", "Middle class"))
MeansClass1<-data.frame(new.dat, predict(MeansClass, newdata = new.dat, interval = 'confidence'))
MeansClassBES<-lm(profile_Leave~profile_SubClass, data=Brexit.BES.Clean,  w=Brexit.BES.Clean$profile_weights)
MeansClassBES2<-data.frame(new.dat, predict(MeansClassBES, newdata = new.dat, interval = 'confidence'))

MeansClassBES2$Probabilities<-"BES"
MeansClass1$Probabilities<-"Guess"
MeansClass.Bind<-bind_rows(MeansClass1, MeansClassBES2)

MeansFamClass<-lm(profile_Leave~profile_SubFamClass, data=Brexit.Experiment.Clean,  w=Brexit.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_SubFamClass=c("Family working class", "Family middle class"))
MeansFamClass1<-data.frame(new.dat, predict(MeansFamClass, newdata = new.dat, interval = 'confidence'))
MeansFamClassBES<-lm(profile_Leave~profile_SubFamClass, data=Brexit.BES.Clean,  w=Brexit.BES.Clean$profile_weights)
MeansFamClassBES2<-data.frame(new.dat, predict(MeansFamClassBES, newdata = new.dat, interval = 'confidence'))

MeansFamClassBES2$Probabilities<-"BES"
MeansFamClass1$Probabilities<-"Guess"
MeansFamClass.Bind<-bind_rows(MeansFamClass1, MeansFamClassBES2)


MeansHomeStatus<-lm(profile_Leave~profile_HomeStatus, data=Brexit.Experiment.Clean,  w=Brexit.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_HomeStatus=c("Rents home", "Owns home"))
MeansHomeStatus1<-data.frame(new.dat, predict(MeansHomeStatus, newdata = new.dat, interval = 'confidence'))
MeansHomeStatusBES<-lm(profile_Leave~profile_HomeStatus, data=Brexit.BES.Clean,  w=Brexit.BES.Clean$profile_weights)
MeansHomeStatusBES2<-data.frame(new.dat, predict(MeansHomeStatusBES, newdata = new.dat, interval = 'confidence'))

MeansHomeStatusBES2$Probabilities<-"BES"
MeansHomeStatus1$Probabilities<-"Guess"
MeansHomeStatus.Bind<-bind_rows(MeansHomeStatus1, MeansHomeStatusBES2)


#Parties

MeansAge<-lm(profile_Con~profile_AgeGroup, data=Parties.Experiment.Clean,  w=Parties.Experiment.Clean$respondent_weights)
levels(Parties.Experiment.Clean$profile_AgeGroup)
new.dat <- data.frame(profile_AgeGroup=c("18-29", "30-44", "45-59", "60-74", "75 +"))
MeansAge1<-data.frame(new.dat, predict(MeansAge, newdata = new.dat, interval = 'confidence'))
MeansAgeBES<-lm(profile_Con~profile_AgeGroup, data=Parties.BES.Clean,  w=Parties.BES.Clean$profile_weights)
MeansAgeBES2<-data.frame(new.dat, predict(MeansAgeBES, newdata = new.dat, interval = 'confidence'))

MeansAgeBES2$Probabilities<-"BES"
MeansAge1$Probabilities<-"Guess"
MeansAgeP.Bind<-bind_rows(MeansAge1, MeansAgeBES2)

MeansEducationP<-lm(profile_Con~profile_Education, data=Parties.Experiment.Clean,  w=Parties.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_Education=c('Holds a university degree', 'Does not hold a university degree'))
MeansEducationP2<-data.frame(new.dat, predict(MeansEducationP, newdata = new.dat, interval = 'confidence'))
MeansEducationPBES<-lm(profile_Con~profile_Education, data=Parties.BES.Clean,  w=Parties.BES.Clean$profile_weights)
MeansEducationPBES2<-data.frame(new.dat, predict(MeansEducationPBES, newdata = new.dat, interval = 'confidence'))

MeansEducationPBES2$Probabilities<-"BES"
MeansEducationP2$Probabilities<-"Guess"
MeansEducationP.Bind<-bind_rows(MeansEducationP2, MeansEducationPBES2)


MeansIncomeP<-lm(profile_Con~profile_AnnualHouseholdIncome, data=Parties.Experiment.Clean,  w=Parties.Experiment.Clean$respondent_weights)
new.dat <- data.frame((profile_AnnualHouseholdIncome=factor(c("Less than 5,199"[order(1)],
                                                             "Between 5,200 and 15,599"[order(2)],
                                                             "Between 15,600 and 25,999"[order(3)],
                                                             "Between 26,000 and 36,399"[order(4)],
                                                             "Between 36,400 and 44,999"[order(5)],
                                                             "Between 45,000 and 59,999"[order(6)],
                                                             "Between 60,000 and 99,999"[order(7)],
                                                             "Greater than 100,000"[order(8)]))))

MeansIncomeP2<-data.frame(new.dat, predict(MeansIncomeP, newdata = new.dat, interval = 'confidence'))
MeansIncomePBES<-lm(profile_Con~profile_AnnualHouseholdIncome, data=Parties.BES.Clean,  w=Parties.BES.Clean$profile_weights)
MeansIncomePBES2<-data.frame(new.dat, predict(MeansIncomePBES, newdata = new.dat, interval = 'confidence'))


MeansIncomePBES2$Probabilities<-"BES"
MeansIncomeP2$Probabilities<-"Guess"
MeansIncomeP.Bind<-bind_rows(MeansIncomeP2, MeansIncomePBES2)

MeansIncomeP.Bind$profile_AnnualHouseholdIncome<-factor(MeansIncome.Bind$profile_AnnualHouseholdIncome,
                                                       levels = c("Less than 5,199"[order(1)],
                                                             "Between 5,200 and 15,599"[order(2)],
                                                             "Between 15,600 and 25,999"[order(3)],
                                                             "Between 26,000 and 36,399"[order(4)],
                                                             "Between 36,400 and 44,999"[order(5)],
                                                             "Between 45,000 and 59,999"[order(6)],
                                                             "Between 60,000 and 99,999"[order(7)],
                                                             "Greater than 100,000"[order(8)] ))



MeansRegionP<-lm(profile_Con~profile_Region, data=Parties.Experiment.Clean,  w=Parties.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_Region=factor(c("London", "North East",
                                              "North West","Scotland",
                                              "South East","South West",
                                              "the East Midlands","the East of England",
                                              "Wales","West Midlands",
                                              "Yorkshire & Humber")))
MeansRegionP1<-data.frame(new.dat, predict(MeansRegionP, newdata = new.dat, interval = 'confidence'))
MeansRegionP1$profile_Region<-factor(MeansRegionP1$profile_Region, levels=MeansRegionP1$profile_Region[order(MeansRegionP1$fit)])
MeansRegionPBES<-lm(profile_Con~profile_Region, data=Parties.BES.Clean,  w=Parties.BES.Clean$profile_weights)
MeansRegionPBES2<-data.frame(new.dat, predict(MeansRegionPBES, newdata = new.dat, interval = 'confidence'))


MeansRegionPBES2$Probabilities<-"BES"
MeansRegionP1$Probabilities<-"Guess"
MeansRegionP.Bind<-bind_rows(MeansRegionP1, MeansRegionPBES2)
Realvote<-Parties.Experiment.Clean %>%
dplyr::  group_by(profile_Region) %>%
  dplyr::   summarise(fit = mean(profile_regional_real_Con, na.rm=T))
Realvote$Probabilities<-"Real"

MeansRegionP2.Bind<-bind_rows(MeansRegionP.Bind, Realvote)
MeansRegionP2.Bind$Probabilities<-factor(MeansRegion.Bind2$Probabilities,
       levels=c("BES"[order(1)],"Real"[order(2)],"Guess"[order(3)]))


MeansClassP<-lm(profile_Con~profile_SubClass, data=Parties.Experiment.Clean,  w=Parties.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_SubClass=c("Working class", "Middle class"))
MeansClassP1<-data.frame(new.dat, predict(MeansClassP, newdata = new.dat, interval = 'confidence'))
MeansClassPBES<-lm(profile_Con~profile_SubClass, data=Parties.BES.Clean,  w=Parties.BES.Clean$profile_weights)
MeansClassPBES2<-data.frame(new.dat, predict(MeansClassPBES, newdata = new.dat, interval = 'confidence'))

MeansClassPBES2$Probabilities<-"BES"
MeansClassP1$Probabilities<-"Guess"
MeansClassP.Bind<-bind_rows(MeansClassP1, MeansClassPBES2)


MeansFamClassP<-lm(profile_Con~profile_SubFamClass, data=Parties.Experiment.Clean,  w=Parties.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_SubFamClass=c("Family working class", "Family middle class"))
MeansFamClassP1<-data.frame(new.dat, predict(MeansFamClassP, newdata = new.dat, interval = 'confidence'))
MeansFamClassPBES<-lm(profile_Con~profile_SubFamClass, data=Parties.BES.Clean,  w=Parties.BES.Clean$profile_weights)
MeansFamClassPBES2<-data.frame(new.dat, predict(MeansFamClassPBES, newdata = new.dat, interval = 'confidence'))

MeansFamClassPBES2$Probabilities<-"BES"
MeansFamClassP1$Probabilities<-"Guess"
MeansFamClassP.Bind<-bind_rows(MeansFamClassP1, MeansFamClassPBES2)

MeansHomeStatusP<-lm(profile_Con~profile_HomeStatus, data=Parties.Experiment.Clean,  w=Parties.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_HomeStatus=c("Rents home", "Owns home"))
MeansHomeStatusP1<-data.frame(new.dat, predict(MeansHomeStatusP, newdata = new.dat, interval = 'confidence'))
MeansHomeStatusPBES<-lm(profile_Con~profile_HomeStatus, data=Parties.BES.Clean,  w=Parties.BES.Clean$profile_weights)
MeansHomeStatusPBES2<-data.frame(new.dat, predict(MeansHomeStatusPBES, newdata = new.dat, interval = 'confidence'))

MeansHomeStatusPBES2$Probabilities<-"BES"
MeansHomeStatusP1$Probabilities<-"Guess"
MeansHomeStatusP.Bind<-bind_rows(MeansHomeStatusP1, MeansHomeStatusPBES2)


MeansReligionP<-lm(profile_Con~profile_Religion, data=Parties.Experiment.Clean,  w=Parties.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_Religion=c("Roman Catholic","Church of England/Anglican/Episcopal",
                                 "Hindu","Islam",
                                 "Methodist","Presbyterian/Church of Scotland",
                                 "No religion","Unknown" ))
MeansReligionP1<-data.frame(new.dat, predict(MeansReligionP, newdata = new.dat, interval = 'confidence'))
MeansReligionP1$profile_Religion<-factor(MeansReligionP1$profile_Religion, levels=MeansReligionP1$profile_Religion[order(MeansReligionP1$fit)])
MeansReligionPBES<-lm(profile_Con~profile_Religion, data=Parties.BES.Clean,  w=Parties.BES.Clean$profile_weights)
MeansReligionPBES2<-data.frame(new.dat, predict(MeansReligionPBES, newdata = new.dat, interval = 'confidence'))

MeansReligionPBES2$Probabilities<-"BES"
MeansReligionP1$Probabilities<-"Guess"
MeansReligionP.Bind<-bind_rows(MeansReligionP1, MeansReligionPBES2)


MeansEthnicityP<-lm(profile_Con~profile_Ethnicity, data=Parties.Experiment.Clean,  w=Parties.Experiment.Clean$respondent_weights)
new.dat<-data.frame(profile_Ethnicity=factor(c("Asian","Black",                            
                                       "Mixed","White",                            
                                       "Unknown")))
MeansEthnicityP1<-data.frame(new.dat, predict(MeansEthnicityP, newdata = new.dat, interval = 'confidence'))
MeansEthnicityP1$profile_Ethnicity<-factor(MeansEthnicityP1$profile_Ethnicity, levels=MeansEthnicityP1$profile_Ethnicity[order(MeansEthnicityP1$fit)])
MeansEthnicityPBES<-lm(profile_Con~profile_Ethnicity, data=Parties.BES.Clean,  w=Parties.BES.Clean$profile_weights)
MeansEthnicityPBES2<-data.frame(new.dat, predict(MeansEthnicityPBES, newdata = new.dat, interval = 'confidence'))

MeansEthnicityPBES2$Probabilities<-"BES"
MeansEthnicityP1$Probabilities<-"Guess"
MeansEthnicityP.Bind<-bind_rows(MeansEthnicityP1, MeansEthnicityPBES2)


MeansGenderP<-lm(profile_Con~profile_Gender, data=Parties.Experiment.Clean,  w=Parties.Experiment.Clean$respondent_weights)
new.dat <- data.frame(profile_Gender=c("Male", "Female"))
MeansGenderP1<-data.frame(new.dat, predict(MeansGenderP, newdata = new.dat, interval = 'confidence'))
MeansGenderPBES<-lm(profile_Con~profile_Gender, data=Parties.BES.Clean,  w=Parties.BES.Clean$profile_weights)
MeansGenderPBES2<-data.frame(new.dat, predict(MeansGenderPBES, newdata = new.dat, interval = 'confidence'))

MeansGenderPBES2$Probabilities<-"BES"
MeansGenderP1$Probabilities<-"Guess"
MeansGenderP.Bind<-bind_rows(MeansGenderP1, MeansGenderPBES2)


### COMBINE INTO ONE BIG PLOT FOR EACH EXPERIMENT

relabel_for_single_plot <- function(df) {
  df[,1] <- paste0(colnames(df)[1],": ",df[,1])
  colnames(df)[1] <- "Group"
  return(df)
}

# tidy labels
colnames(MeansIncome.Bind)[1] <- "Income"

MeansAll.Bind <- rbind(
relabel_for_single_plot(MeansAge.Bind),  
relabel_for_single_plot(MeansGender.Bind),
relabel_for_single_plot(MeansEthnicity.Bind),
relabel_for_single_plot(MeansEducation.Bind),
relabel_for_single_plot(MeansHomeStatus.Bind),
relabel_for_single_plot(MeansClass.Bind),
relabel_for_single_plot(MeansFamClass.Bind),
relabel_for_single_plot(MeansRegion.Bind2),
relabel_for_single_plot(MeansIncome.Bind),
relabel_for_single_plot(MeansReligion.Bind)
)
MeansAll.Bind$Group<- gsub("profile_", "",MeansAll.Bind$Group)

MeansAll.Bind <- MeansAll.Bind %>%
  tidyr::separate(Group, into = c("Group", "Attribute"), sep=": ")

MeansAll.Bind<- MeansAll.Bind %>%
  mutate(Attribute = fct_reorder(Attribute, fit))
MeansAll.Bind<- MeansAll.Bind %>%
  mutate(Attribute = fct_relevel(Attribute,"Unknown", "Less than 5,199", "Between 5,200 and 15,599","Between 15,600 and 25,999", "Between 26,000 and 36,399", "Between 36,400 and 44,999",  "Between 45,000 and 59,999", "Between 60,000 and 99,999", "Greater than 100,000"))

MeansAll.Bind<- MeansAll.Bind %>%
  mutate(Group = dplyr::recode(Group, HomeStatus = "Home Status", AgeGroup = "Age", SubClass= "Class", SubFamClass = "Family Class"))

P_All_Brex <- ggplot(MeansAll.Bind, aes(x = Attribute, y=fit, col = Probabilities, shape = Probabilities)) + 
  geom_errorbar(aes(ymin=lwr, ymax=upr), width=.1, position= position_dodge(w=0.5)) +
  geom_point(position= position_dodge(w=0.5), size=2) +
  geom_hline(yintercept=50, linetype="dashed", 
             size=1, color="grey", alpha=0.5)+
  theme_classic()+
  ggtitle("Vote Leave by Group") + theme(plot.title = element_text(hjust = 0.5)) +
  xlab("") + ylab("")+
  facet_grid(Group~., scales = "free_y", space = "free_y")+
  theme(legend.position="bottom", strip.text.y =element_text(angle = 0),
        panel.grid.major.y = element_line(colour = "grey95"),
        panel.background = element_rect(colour = "black"))+
  coord_flip(ylim= c(0, 100))+
  scale_color_manual(values = c("dodgerblue4","brown3", "dodgerblue3"))+
  scale_shape_manual(values = c(17, 16, 15))


# tidy labels
colnames(MeansIncomeP.Bind)[1] <- "Income"
MeansIncomeP.Bind<-MeansIncomeP.Bind %>%
  select(everything(), -profile_AnnualHouseholdIncome)

MeansAllP.Bind <- rbind(
relabel_for_single_plot(MeansAgeP.Bind),  
relabel_for_single_plot(MeansGenderP.Bind),
relabel_for_single_plot(MeansEthnicityP.Bind),
relabel_for_single_plot(MeansEducationP.Bind),
relabel_for_single_plot(MeansHomeStatusP.Bind),
relabel_for_single_plot(MeansClassP.Bind),
relabel_for_single_plot(MeansFamClassP.Bind),
relabel_for_single_plot(MeansRegionP2.Bind),
relabel_for_single_plot(MeansIncomeP.Bind),
relabel_for_single_plot(MeansReligionP.Bind)
)

MeansAllP.Bind$Group<- gsub("profile_", "",MeansAllP.Bind$Group)

MeansAllP.Bind <- MeansAllP.Bind %>%
  separate(Group, into = c("Group", "Attribute"), sep=": ")

MeansAllP.Bind<- MeansAllP.Bind %>%
  mutate(Attribute = fct_reorder(Attribute, fit))
MeansAllP.Bind<- MeansAllP.Bind %>%
  mutate(Attribute = fct_relevel(Attribute,"Unknown", "Less than 5,199", "Between 5,200 and 15,599","Between 15,600 and 25,999", "Between 26,000 and 36,399", "Between 36,400 and 44,999",  "Between 45,000 and 59,999", "Between 60,000 and 99,999", "Greater than 100,000"))

MeansAllP.Bind<- MeansAllP.Bind %>%
  mutate(Group = dplyr::recode(Group, HomeStatus = "Home Status", AgeGroup = "Age", SubClass= "Class", SubFamClass = "Family Class"))

P_All_Party <- ggplot(MeansAllP.Bind, aes(x = Attribute, y=fit, col = Probabilities, shape = Probabilities)) + 
  geom_errorbar(aes(ymin=lwr, ymax=upr), width=.1, position= position_dodge(w=0.5)) +
  geom_point(position= position_dodge(w=0.5), size=2) +
  geom_hline(yintercept=50, linetype="dashed", 
             size=1, color="grey", alpha=0.5)+
  theme_classic()+
  ggtitle("Vote Conservative by Group") + theme(plot.title = element_text(hjust = 0.5)) +
  xlab("") + ylab("")+
  facet_grid(Group~., scales = "free_y", space = "free_y")+
  theme(legend.position="bottom", strip.text.y =element_text(angle = 0),
        panel.grid.major.y = element_line(colour = "grey95"),
        panel.background = element_rect(colour = "black"))+
  coord_flip(ylim= c(0, 100))+
  scale_color_manual(values = c("dodgerblue4","brown3", "dodgerblue3"))+
  scale_shape_manual(values = c(17, 16, 15))



```
```{r,include=FALSE}

#The following chunk allows for a comparison between BES and guesses, modelling all profile attributes together. Models are estaimted in logit and linear form. Linear model lead to invalid predictions for the binary vote choice for some profiles


# Brexit Linear model, predicting guessed probability of leave with porfiles attributes as predictors
GuessedLeave<- lm(profile_Leave ~ profile_Gender+profile_AgeGroup+profile_Education+profile_Region+profile_SubClass+profile_SubFamClass+profile_HomeStatus+profile_Religion+profile_Ethnicity+profile_AnnualHouseholdIncome, weights=Brexit.Experiment.Clean$respondent_weights, data = Brexit.Experiment.Clean)
BESLeave<- lm(profile_Leave ~ profile_Gender+profile_AgeGroup+profile_Education+profile_Region+profile_SubClass+profile_SubFamClass+profile_HomeStatus+profile_Religion+profile_Ethnicity+profile_AnnualHouseholdIncome, weights=Brexit.BES.Clean$profile_weights, data = Brexit.BES.Clean)

# Brexit Logit model
Brexit.Experiment.Clean$LeaveProp <- Brexit.Experiment.Clean$profile_Leave/100

#Brexit.Sep is a dataset created to ease later combination
Brexit.Sep<- Brexit.Experiment.Clean %>%
select(LeaveProp, starts_with("profile"), respondent_weights)
colnames(Brexit.Sep)<-paste0(colnames(Brexit.Sep), "_")

GuessedLeaveLogit0 <- glm("LeaveProp_ ~ profile_Gender_ +profile_AgeGroup_ +profile_Education_ +profile_Region_ +profile_SubClass_ +profile_SubFamClass_ +profile_HomeStatus_ +profile_Religion_ +profile_Ethnicity_ +profile_AnnualHouseholdIncome_", weights=Brexit.Sep$respondent_weights_, data = Brexit.Sep, family=quasibinomial(link="logit"))

# Same regression with Brexit.Experiment.Clean dataset (guesses model)
GuessedLeaveLogit<-glm(LeaveProp ~ profile_Gender +profile_AgeGroup +profile_Education +profile_Region +profile_SubClass +profile_SubFamClass +profile_HomeStatus +profile_Religion +profile_Ethnicity +profile_AnnualHouseholdIncome, weights=Brexit.Experiment.Clean$respondent_weights, data = Brexit.Experiment.Clean, family=quasibinomial(link="logit"))


# Same regression with Brexit.BES.Clean dataset (BES model)
Brexit.BES.Clean$Leave.BES.Prop <- Brexit.BES.Clean$profile_Leave/100

#Brexit.Sep.BES is a dataset created to ease later combination
Brexit.Sep.BES<- Brexit.BES.Clean %>%
select(Leave.BES.Prop, starts_with("profile"))
colnames(Brexit.Sep.BES)<-paste0(colnames(Brexit.Sep.BES), "_")

BESLeaveLogit0 <- glm("Leave.BES.Prop_ ~ profile_Gender_ +profile_AgeGroup_ +profile_Education_ +profile_Region_ +profile_SubClass_ +profile_SubFamClass_ +profile_HomeStatus_ +profile_Religion_ +profile_Ethnicity_ +profile_AnnualHouseholdIncome_", weights=Brexit.Sep.BES$profile_weights_, data = Brexit.Sep.BES, family=quasibinomial(link="logit"))

# Same regression with Brexit.BES.Clean dataset (BES model)
BESLeaveLogit <- glm(Leave.BES.Prop ~ profile_Gender +profile_AgeGroup +profile_Education +profile_Region +profile_SubClass +profile_SubFamClass +profile_HomeStatus +profile_Religion +profile_Ethnicity +profile_AnnualHouseholdIncome, weights=Brexit.BES.Clean$profile_weights, data = Brexit.BES.Clean, family=quasibinomial(link="logit"))

#Plotting guesses and BES model together

P.Mult.Leave0<-multiplot(GuessedLeaveLogit0,BESLeaveLogit0, title = "Predictors of Voting Leave: Guessed versus BES", xlab = "Logistic Coefficents",
          ylab = "", innerCI = 0, outerCI = 2, lwdInner = 1,
          lwdOuter = 0.5, pointSize = 2, dodgeHeight = 0.5, color = "blue",
          shape = 16, linetype = 1, cex = 0.8, textAngle = 0,
          numberAngle = 90, zeroColor = "grey", zeroLWD = 1, zeroType = 2,
          single = T, scales = "fixed", ncol = length(unique(modelCI$Model)),
          sort = c("natural", "normal", "magnitude", "size", "alphabetical"),
          decreasing = FALSE, names = c("Guesses","BES"), numeric = FALSE, fillColor = "grey",
          alpha = 1/2, horizontal = FALSE, factors = NULL, only = NULL,
          shorten = TRUE, intercept = F, interceptName = "(Intercept)",
          coefficients = NULL, predictors = NULL, strict = FALSE,
          newNames = NULL, plot = TRUE, drop = FALSE, by = c("Coefficient",
                                                             "Model"), plot.shapes = FALSE, plot.linetypes = FALSE,
          legend.position =  "bottom",
          secret.weapon = FALSE, legend.reverse = FALSE, trans = identity)

P.Mult.Leave0$data<- P.Mult.Leave0$data %>%
  mutate(Coefficient = str_remove(Coefficient, "profile_")) %>%
  separate(Coefficient, into = c("Group", "Coefficient"), sep = "_") 
P.Mult.Leave0$data<- P.Mult.Leave0$data %>%
  mutate(Coefficient = fct_reorder(Coefficient, Value))
P.Mult.Leave0$data<- P.Mult.Leave0$data %>%
  mutate(Coefficient = fct_relevel(Coefficient,"Unknown", "Between 5,200 and 15,599","Between 15,600 and 25,999", "Between 26,000 and 36,399", "Between 36,400 and 44,999",  "Between 45,000 and 59,999", "Between 60,000 and 99,999", "Greater than 100,000"))

P.Mult.Leave0$data<- P.Mult.Leave0$data %>%
  mutate(Group = dplyr::recode(Group, HomeStatus = "Home Status", AgeGroup = "Age", SubClass= "Class", SubFamClass = "Family Class", AnnualHouseholdIncome = "Income"))
  
P.Mult.Leave <-P.Mult.Leave0+
  theme_classic()+
  ggtitle("Voting Leave: Guessed versus BES") + theme(plot.title = element_text(hjust = 0.5)) +
  facet_grid(Group~., scales = "free_y", space = "free_y")+
  theme(legend.position="bottom", strip.text.y =element_text(angle = 0),
        panel.grid.major.y = element_line(colour = "grey95"),
        panel.background = element_rect(colour = "black"))+
  scale_color_manual(values = c( "dodgerblue4", "brown3"))+
  scale_shape_manual(values = c(17, 16))

# Parties Linear models
GuessedConservative <- lm(profile_Con ~ profile_Gender+profile_AgeGroup+profile_Education+profile_Region+profile_SubClass+profile_SubFamClass+profile_HomeStatus+profile_Religion+profile_Ethnicity+profile_AnnualHouseholdIncome ,weights = Parties.Experiment.Clean$respondent_weights,data = Parties.Experiment.Clean)
RealConservative<- lm(profile_Con ~ profile_Gender+profile_AgeGroup+profile_Education+profile_Region+profile_SubClass+profile_SubFamClass+profile_HomeStatus+profile_Religion+profile_Ethnicity+profile_AnnualHouseholdIncome, weights=Parties.BES.Clean$profile_weights, data = Parties.BES.Clean)

# Parties Logstics models
Parties.Experiment.Clean$ConProp <- Parties.Experiment.Clean$profile_Con/100

# Party.Sep dataset created to to ease combination later
Party.Sep<- Parties.Experiment.Clean %>%
select(ConProp, starts_with("profile"), respondent_weights)
colnames(Party.Sep)<-paste0(colnames(Party.Sep), "_")

GuessedConservativeLogit0 <- glm("ConProp_ ~ profile_Gender_ +profile_AgeGroup_ +profile_Education_ +profile_Region_ +profile_SubClass_ +profile_SubFamClass_ +profile_HomeStatus_ +profile_Religion_ +profile_Ethnicity_ +profile_AnnualHouseholdIncome_", weights=Party.Sep$respondent_weights_, data = Party.Sep, family=quasibinomial(link="logit"))

# Same regression with Parties.Experiment.Clean (guesses)
GuessedConservativeLogit <- glm(ConProp ~ profile_Gender +profile_AgeGroup  +profile_Education  +profile_Region  +profile_SubClass  +profile_SubFamClass  +profile_HomeStatus  +profile_Religion  +profile_Ethnicity  +profile_AnnualHouseholdIncome , weights=Parties.Experiment.Clean$respondent_weights, data = Parties.Experiment.Clean, family=quasibinomial(link="logit"))

#Same regression with BES model
Parties.BES.Clean$profile_Con.Prop <- Parties.BES.Clean$profile_Con/100
Party.Sep.BES<- Parties.BES.Clean %>%
select(starts_with("profile"))
colnames(Party.Sep.BES)<-paste0(colnames(Party.Sep.BES), "_")

RealConservativeLogit0 <- glm("profile_Con.Prop_ ~ profile_Gender_ +profile_AgeGroup_ +profile_Education_ +profile_Region_ +profile_SubClass_ +profile_SubFamClass_ +profile_HomeStatus_ +profile_Religion_ +profile_Ethnicity_ +profile_AnnualHouseholdIncome_", weights=Party.Sep.BES$profile_weights_, data = Party.Sep.BES, family=quasibinomial(link="logit"))

RealConservativeLogit <- glm(profile_Con.Prop ~ profile_Gender +profile_AgeGroup +profile_Education +profile_Region +profile_SubClass +profile_SubFamClass +profile_HomeStatus +profile_Religion +profile_Ethnicity +profile_AnnualHouseholdIncome, weights=Parties.BES.Clean$profile_weights_, data = Parties.BES.Clean, family=quasibinomial(link="logit"))

# Plotting BES and Guesses together

P.Mult.Con0<-multiplot(GuessedConservativeLogit0, RealConservativeLogit0, title = "Voting Conservative: Guessed versus BES", xlab = "Logistic Coefficents",
          ylab = "", innerCI = 0, outerCI = 2, lwdInner = 1,
          lwdOuter = 0.5, pointSize = 2, dodgeHeight = 0.5, color = "blue",
          shape = 16, linetype = 1, cex = 0.8, textAngle = 0,
          numberAngle = 90, zeroColor = "grey", zeroLWD = 1, zeroType = 2,
          single = TRUE, scales = "fixed", ncol = length(unique(modelCI$Model)),
          sort = c("natural", "normal", "magnitude", "size", "alphabetical"),
          decreasing = FALSE, names = c("Guesses","BES"), numeric = FALSE, fillColor = "grey",
          alpha = 1/2, horizontal = FALSE, factors = NULL, only = NULL,
          shorten = TRUE, intercept = F, interceptName = "(Intercept)",
          coefficients = NULL, predictors = NULL, strict = FALSE,
          newNames = NULL, plot = TRUE, drop = FALSE, by = c("Coefficient",
                                                             "Model"), plot.shapes = FALSE, plot.linetypes = FALSE,
          legend.position = "bottom",
          secret.weapon = FALSE, legend.reverse = FALSE, trans = identity)

P.Mult.Con0$data<- P.Mult.Con0$data %>%
  mutate(Coefficient = str_remove(Coefficient, "profile_")) %>%
  separate(Coefficient, into = c("Group", "Coefficient"), sep = "_") 
P.Mult.Con0$data<- P.Mult.Con0$data %>%
  mutate(Coefficient = fct_reorder(Coefficient, Value))
P.Mult.Con0$data<- P.Mult.Con0$data %>%
  mutate(Coefficient = fct_relevel(Coefficient,"Unknown", "Between 5,200 and 15,599","Between 15,600 and 25,999", "Between 26,000 and 36,399", "Between 36,400 and 44,999",  "Between 45,000 and 59,999", "Between 60,000 and 99,999", "Greater than 100,000"))

P.Mult.Con0$data<- P.Mult.Con0$data %>%
  mutate(Group = dplyr::recode(Group, HomeStatus = "Home Status", AgeGroup = "Age", SubClass= "Class", SubFamClass = "Family Class", AnnualHouseholdIncome = "Income"))


P.Mult.Con<-P.Mult.Con0+
    theme_classic()+
  ggtitle("Voting Conservative: Guessed versus BES") + theme(plot.title = element_text(hjust = 0.5)) +
  facet_grid(Group~., scales = "free_y", space = "free_y")+
  theme(legend.position="bottom", strip.text.y =element_text(angle = 0),
        panel.grid.major.y = element_line(colour = "grey95"),
        panel.background = element_rect(colour = "black"))+
  scale_color_manual(values = c( "dodgerblue4", "brown3"))+
  scale_shape_manual(values = c(17, 16))


```

```{r,include=FALSE}

#This chunk allows for comparisons between the predicted probabilities of profiles voting in a certain way, implied by the BES model and the Guesses model.



#Brexit experiment
fitted_BES_Brex<-predict(BESLeaveLogit, newdata=Brexit.BES.Clean, type="response")
predicted_Experiment_Brex <- predict(GuessedLeaveLogit , newdata=Brexit.BES.Clean,type="response")
fitted_BES_Brexit<-data.frame("profile"= c(1:1695), fitted_BES_Brex)
predicted_Experiment_Brexit<-data.frame("profile"= c(1:1695), predicted_Experiment_Brex)
Relation.Models.Brexit<-merge(fitted_BES_Brexit, predicted_Experiment_Brexit, by="profile")


leave.predicted.plot <- ggplot(Relation.Models.Brexit, aes(x=fitted_BES_Brex, y= predicted_Experiment_Brex)) +
  geom_point(alpha = 0.5, col = "steelblue2")+
  geom_abline(intercept = 0, slope = 1)+
  ylim(0,1)+ 
  xlim(0,1)+
  ggtitle("Brexit Experiment") + 
  ggplot2::ylab("Predicted Probability Leave - Experiment")+
  ggplot2::xlab("Predicted Probability Leave - BES")+
  theme_classic()

#Parties experiment
fitted_BES_Con<- predict(RealConservativeLogit, newdata=Parties.BES.Clean,type="response")
predicted_Experiment_Con <- predict(GuessedConservativeLogit, newdata=Parties.BES.Clean,type="response")

fitted_BES_Conservative<-data.frame("profile"= c(1:1362), fitted_BES_Con)
predicted_Experiment_Conservative<-data.frame("profile"= c(1:1362), predicted_Experiment_Con)


Relation.Models.Con<-merge(fitted_BES_Conservative, predicted_Experiment_Conservative, by="profile")



parties.predicted.plot <- ggplot(Relation.Models.Con, aes(x= fitted_BES_Con, y=predicted_Experiment_Con)) +
  geom_point( alpha = 0.5, col="steelblue2")+
  geom_abline(intercept = 0, slope = 1)+
  xlim(0,1)+
  ylim(0,1)+
  ggtitle("Party Experiment") + 
  ggplot2::ylab("Predicted Probability Conservative - Experiment")+
  ggplot2::xlab("Predicted Probability Conservative - BES")+
  theme_classic()

```

```{r,include=FALSE}



#Individual Brier Score Brexit
Brexit.Experiment.Clean$BrierScore<- ((Brexit.Experiment.Clean$profile_Leave/100)-Brexit.Experiment.Clean$profile_Leave.Real)^2

BrierScoreTotal<-weighted.mean(Brexit.Experiment.Clean$BrierScore, w=Brexit.Experiment.Clean$respondent_weights, na.rm = T)


#Individual Brier Score Parties
Parties.Experiment.Clean$BrierScore<- (Parties.Experiment.Clean$profile_Conservative.Real-(Parties.Experiment.Clean$profile_Con/100))^2

Parties.Experiment.Clean$BrierScoreBinary <-  ((0.5+0.5*sign((Parties.Experiment.Clean$profile_Con - 50)))-Parties.Experiment.Clean$profile_Conservative.Real)^2

BrierScoreTotalParties<-mean(Parties.Experiment.Clean$BrierScore, w=Parties.Experiment.Clean$respondent_weights, na.rm = T)

```


```{r,include=FALSE}

#Average Brier score

#Predicted probabilities for every profile, using BES model
fitted_BESCoef_BrexExp<-predict(BESLeaveLogit, newdata=Brexit.Experiment.Clean,type="response")
fitted_BESCoef_BrexExp<-data.frame(Brexit.Experiment.Clean, fitted_BESCoef_BrexExp)
fitted_BESCoef_PartyExp<-predict(RealConservativeLogit, newdata=Parties.Experiment.Clean,type="response")
fitted_BESCoef_PartyExp<-data.frame(Parties.Experiment.Clean, fitted_BESCoef_PartyExp)

fitted_BESCoef_BrexExp<-fitted_BESCoef_BrexExp %>%
dplyr::rename(fittedBrexExp = fitted_BESCoef_BrexExp)
fitted_BESCoef_PartyExp<-fitted_BESCoef_PartyExp %>%
  dplyr::rename(fittedPartyExp = fitted_BESCoef_PartyExp)

#Brier score Parties
fitted_BESCoef_PartyExp$BrierScore.fitted<- (fitted_BESCoef_PartyExp$fittedPartyExp-(fitted_BESCoef_PartyExp$profile_Con/100))^2
BrierScoreTotalParty.fitted<-mean(fitted_BESCoef_PartyExp$BrierScore.fitted, w=Parties.Experiment.Clean$respondent_weights, na.rm = T)

#Brier Score Brexit
fitted_BESCoef_BrexExp$BrierScore.fitted<- (fitted_BESCoef_BrexExp$fittedBrexExp-(fitted_BESCoef_BrexExp$profile_Leave/100))^2
BrierScoreTotalBrex.fitted<-mean(fitted_BESCoef_BrexExp$BrierScore.fitted, w=Parties.Experiment.Clean$respondent_weights, na.rm = T)

#Brier score by respondent characteristics


#Predicting Brier Score with politcal attention only. Political attention as factor variable
Brexit.Experiment.Clean$respondent_PoliticalAttention<-factor(Brexit.Experiment.Clean$respondent_PoliticalAttention,
                                                  levels=c("0"[order(1)],"1"[order(2)],
                                                           "2"[order(3)],"3"[order(4)],
                                                           "4"[order(5)],"5"[order(6)],
                                                           "6"[order(7)],"7"[order(8)],
                                                           "8"[order(9)],"9"[order(10)],
                                                           "10"[order(11)]))
BS_byAttention_Brexit2<-lm(BrierScore~respondent_PoliticalAttention, weights=Brexit.Experiment.Clean$respondent_weights, data = Brexit.Experiment.Clean)

new.dat <-data.frame(respondent_PoliticalAttention=c("0",  "1",  "2",  "3",
                                            "4",  "5",  "6",  "7",  
                                            "8",  "9", "10"))
BS_byAttention_Brexit2<-data.frame(new.dat, predict(BS_byAttention_Brexit2, newdata = new.dat, interval = 'confidence'))

BS_byAttention_Brexit2$respondent_PoliticalAttention=factor(BS_byAttention_Brexit2$respondent_PoliticalAttention,
                                                  levels=c("0"[order(1)],"1"[order(2)],
                                                           "2"[order(3)],"3"[order(4)],
                                                           "4"[order(5)],"5"[order(6)],
                                                           "6"[order(7)],"7"[order(8)],
                                                           "8"[order(9)],"9"[order(10)],
                                                           "10"[order(11)]))


brier.by.attention.leave <- ggplot(BS_byAttention_Brexit2, aes(x=respondent_PoliticalAttention, y=fit, group = 1)) + 
  geom_errorbar(aes(ymin=lwr, ymax=upr), width=.1) +
  #geom_line() +
  geom_point() +
  ggtitle("Brexit Experiment") + theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Political Attention") + ylab("Brier Score")+
  scale_y_continuous(limits = c(0.2,0.5), breaks = seq(0.2,0.5, by = 0.05)) +
  theme_classic()

#Predicting Brier Score with all respondents' attributes

Brexit.Experiment.Clean$respondent_PoliticalAttention<-as.numeric(as.character(Brexit.Experiment.Clean$respondent_PoliticalAttention))

model.BscAllRespondentR.Brexit<-lm(BrierScore~respondent_PoliticalAttention+respondent_pastvote_2017+respondent_pastvote_EURef+respondent_Age+respondent_Education+respondent_Gender+respondent_Region, weights=Brexit.Experiment.Clean$respondent_weights, data = Brexit.Experiment.Clean)

BS_byAttention_Brexit<-lm(BrierScore~respondent_PoliticalAttention, weights=Brexit.Experiment.Clean$respondent_weights, data = Brexit.Experiment.Clean)






#Parties

#Brier score by respondent characteristics

#Predicting Brier Score with politcal attention only. Political attention as factor variable


Parties.Experiment.Clean$respondent_PoliticalAttention<-factor(Parties.Experiment.Clean$respondent_PoliticalAttention,
                                               levels=c("0"[order(1)],"1"[order(2)],
                                                        "2"[order(3)],"3"[order(4)],
                                                        "4"[order(5)],"5"[order(6)],
                                                        "6"[order(7)],"7"[order(8)],
                                                        "8"[order(9)],"9"[order(10)],
                                                        "10"[order(11)]))


BS_byAttention_Parties<-lm(BrierScore~respondent_PoliticalAttention, weights=Parties.Experiment.Clean$respondent_weights, data = Parties.Experiment.Clean)
new.dat <- data.frame(respondent_PoliticalAttention=c("0",  "1",  "2",  "3",
                                            "4",  "5",  "6",  "7",  
                                            "8",  "9", "10"))
BS_byAttention_Parties2<-data.frame(new.dat, predict(BS_byAttention_Parties, newdata = new.dat, interval = 'confidence'))
BS_byAttention_Parties2$respondent_PoliticalAttention <- factor(BS_byAttention_Parties2$respondent_PoliticalAttention,
                                                   levels=c("0"[order(1)],"1"[order(2)],
                                                            "2"[order(3)],"3"[order(4)],
                                                            "4"[order(5)],"5"[order(6)],
                                                            "6"[order(7)],"7"[order(8)],
                                                            "8"[order(9)],"9"[order(10)],
                                                            "10"[order(11)]))


brier.by.attention.con <- ggplot(BS_byAttention_Parties2, aes(x=respondent_PoliticalAttention, y=fit, group = 1)) + 
  geom_errorbar(aes(ymin=lwr, ymax=upr), width=.1) +
  #geom_line() +
  geom_point() +
  ggtitle("Party Experiment") + theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Political Attention") + ylab("Brier Score")+
  scale_y_continuous(limits = c(0.2,0.5), breaks = seq(0.2,0.5, by = 0.05)) +
  theme_classic()

#Predicting Brier Score with all respondents' attributes

Parties.Experiment.Clean$respondent_PoliticalAttention<-as.numeric(as.character(Parties.Experiment.Clean$respondent_PoliticalAttention))
model.BscAllRespondentR.party<-lm(BrierScore~respondent_PoliticalAttention+respondent_pastvote_2017+respondent_pastvote_EURef+respondent_Age+respondent_Education+respondent_Gender+respondent_Region, weights=Parties.Experiment.Clean$respondent_weights, data = Parties.Experiment.Clean)



#Preparing data for dichotomous analysis in Parties experiment

fitted_BESCoef_BrexExp<- fitted_BESCoef_BrexExp %>%
  mutate(profile_Leave.Dich = case_when( profile_Leave<50 ~ "Remain",
                                         profile_Leave>50 ~ "Leave",
                                         profile_Leave==50 ~ "50-50"))
fitted_BESCoef_PartyExp<- fitted_BESCoef_PartyExp %>%
  mutate(profile_Con.Dich = case_when( profile_Con<50 ~ "Labour",
                                         profile_Con>50 ~ "Conservative",
                                         profile_Con==50 ~ "50-50"))

fitted_BESCoef_BrexExp <- fitted_BESCoef_BrexExp %>%
  mutate(profile_Leave.RealN = case_when(profile_Leave.Real == 1 ~ "Leave" , 
                                          profile_Leave.Real == 0 ~ "Remain"))

#Creating variable that equals 1 for correct dichotomous guess (and 0.5 for "50-50"). Brexit
fitted_BESCoef_BrexExp <- fitted_BESCoef_BrexExp %>%
  mutate(Correct.Dich.Guess.Brex = case_when(profile_Leave.Dich=="50-50" ~ 0.5,
                                             profile_Leave.Dich==profile_Leave.RealN ~ 1,
                                             profile_Leave.Dich!=profile_Leave.RealN ~ 0))

dclus1<-svydesign(ids=~0, weights=fitted_BESCoef_BrexExp$respondent_weights, data=fitted_BESCoef_BrexExp)
meanleave<-svymean(~Correct.Dich.Guess.Brex, dclus1 ,deff=F, na.rm =T)
CI<-confint(meanleave)
CI.Correct.Brex.Dich<-data.frame("Correct"="Brexit", "fit"=meanleave[1]*100, "lwr"=CI[1,1]*100,"upr"=CI[1,2]*100)

#Creating variable that equals 1 for correct dichotomous guess (and 0.5 for "50-50"). Parties
fitted_BESCoef_PartyExp <- fitted_BESCoef_PartyExp %>%
  mutate(profile_Conservative.RealN = case_when(profile_Conservative.Real == 1 ~ "Conservative" , 
                                          profile_Conservative.Real == 0 ~ "Labour"))
fitted_BESCoef_PartyExp <- fitted_BESCoef_PartyExp %>%
  mutate(Correct.Dich.Guess.Party = case_when(profile_Con.Dich=="50-50" ~ 0.5,
                                             profile_Con.Dich==profile_Conservative.RealN ~ 1,
                                             profile_Con.Dich!=profile_Conservative.RealN ~ 0))

dclus1<-svydesign(ids=~0, weights=fitted_BESCoef_PartyExp$respondent_weights, data=fitted_BESCoef_PartyExp)
meanCon<-svymean(~Correct.Dich.Guess.Party, dclus1 ,deff=F, na.rm =T)
CI<-confint(meanCon)
CI.Correct.Party.Dich<-data.frame("Correct"="Party", "fit"=meanCon[1]*100, "lwr"=CI[1,1]*100,"upr"=CI[1,2]*100)

#Comparing dichotomized guess and dichotomized BES model prediction

##Dichotomous guess versus profiles dichotomous fitted vote
fitted_BESCoef_BrexExp <- fitted_BESCoef_BrexExp %>%
  mutate(fittedBrexExp.Dich = case_when(fittedBrexExp==0.5 ~ "50-50",
                                        fittedBrexExp<0.5 ~ "Remain",
                                        fittedBrexExp>0.5 ~ "Leave"))
fitted_BESCoef_PartyExp <- fitted_BESCoef_PartyExp %>%
  mutate(fittedPartyExp.Dich = case_when(fittedPartyExp==0.5 ~ "50-50",
                                         fittedPartyExp<0.5 ~ "Labour",
                                         fittedPartyExp>0.5 ~ "Conservative"))

fitted_BESCoef_BrexExp <- fitted_BESCoef_BrexExp %>%
  mutate(Correct.Dich.Guess.Brex.Fitted= case_when(profile_Leave.Dich== "50-50" ~ 0.5,
                                       profile_Leave.Dich==fittedBrexExp.Dich ~ 1,
                                       profile_Leave.Dich!=fittedBrexExp.Dich ~ 0))

fitted_BESCoef_PartyExp <- fitted_BESCoef_PartyExp %>%
  mutate(Correct.Dich.Guess.Party.Fitted= case_when(profile_Con.Dich== "50-50" ~ 0.5,
                                       profile_Con.Dich==fittedPartyExp.Dich ~ 1,
                                       profile_Con.Dich!=fittedPartyExp.Dich ~ 0))

dclus1<-svydesign(ids=~0, weights=fitted_BESCoef_BrexExp$respondent_weights, data=fitted_BESCoef_BrexExp)
meanleave<-svymean(~Correct.Dich.Guess.Brex.Fitted, dclus1 ,deff=F, na.rm =T)
CI<-confint(meanleave)
CI.Correct.Brex.Dich.Fitted<-data.frame("Correct for fitted"="Brexit", "fit"=meanleave[1]*100, "lwr"=CI[1,1]*100,"upr"=CI[1,2]*100)

dclus1<-svydesign(ids=~0, weights=fitted_BESCoef_PartyExp$respondent_weights, data=fitted_BESCoef_PartyExp)
meanCon<-svymean(~Correct.Dich.Guess.Party.Fitted, dclus1 ,deff=F, na.rm =T)
CI<-confint(meanCon)
CI.Correct.Party.Dich.Fitted<-data.frame("Correct for fitted"="Party", "fit"=meanCon[1]*100, "lwr"=CI[1,1]*100,"upr"=CI[1,2]*100)


dclus1<-svydesign(ids=~0, weights=fitted_BESCoef_PartyExp$respondent_weights, data=fitted_BESCoef_PartyExp)
propleave<-svyciprop(~I(Correct.Dich.Guess.Party.Fitted==1),dclus1, method="lo", df=degf(dclus1))
CI.est<-data.frame("fit"=as.matrix(propleave)*100)
CI.low <-data.frame("lwr"=as.matrix(attr(propleave, "ci")[1])*100)
CI.high <-data.frame("upr"=as.matrix(attr(propleave, "ci")[2])*100)
CI.Correct.Party.Dich.Fitted<-data.frame("Correct for fitted"="Party", CI.est, CI.low,CI.high)

#Predicting dichotomized guess succes by respondents' characteristics
#Brexit

fitted_BESCoef_BrexExp <- fitted_BESCoef_BrexExp%>%
  rename( c( "Correct.Dich.Guess.Brex" ="Correct.Dich.Guess"))
fitted_BESCoef_PartyExp <- fitted_BESCoef_PartyExp %>%
  rename( c( "Correct.Dich.Guess.Party" ="Correct.Dich.Guess"))

fitted_BESCoef_BrexExp$respondent_PoliticalAttention<-as.numeric(as.character(fitted_BESCoef_BrexExp$respondent_PoliticalAttention))

model.Correct.Brexit.Dich<-lm(Correct.Dich.Guess~respondent_PoliticalAttention+respondent_pastvote_2017+respondent_pastvote_EURef+respondent_Age+respondent_Education+respondent_Gender+respondent_Region, weights=fitted_BESCoef_BrexExp$respondent_weights, data = fitted_BESCoef_BrexExp)

#Parties
fitted_BESCoef_PartyExp$respondent_PoliticalAttention<-as.numeric(as.character(fitted_BESCoef_PartyExp$respondent_PoliticalAttention))

model.Correct.Party.Dich<-lm(Correct.Dich.Guess~respondent_PoliticalAttention+respondent_pastvote_2017+respondent_pastvote_EURef+respondent_Age+respondent_Education+respondent_Gender+respondent_Region, weights=fitted_BESCoef_PartyExp$respondent_weights, data = fitted_BESCoef_PartyExp)
```

```{r,include=FALSE}

#Comparing fitted probabilities (BES) with guessed probabilities (not fitted guessed probabilities)
#Comparing predicted (fitted) probabilities



guess.by.fitted.brexit <- ggplot(fitted_BESCoef_BrexExp, aes(x=fittedBrexExp, y= profile_Leave)) + 
  geom_point(alpha=0.5, col = "steelblue2")+
  stat_smooth(se=F, method='lm', col = "black")+
  coord_cartesian (ylim=c(0,100), xlim=c(0,1))+
  ggtitle("Brexit Experiment") + 
  ggplot2::ylab("Guessed percentage")+
  ggplot2::xlab("Predicted Probability")+
  theme_classic()

#Parties experiment
model.prob.party<-lm(profile_Con~fittedPartyExp, data=fitted_BESCoef_PartyExp)
#tab_model(model.prob.party, show.se = TRUE, show.stat = FALSE, digits = 3, title="Relation guessed probabilities with fitted model")

guess.by.fitted.party <- ggplot(fitted_BESCoef_PartyExp, aes(x=fittedPartyExp, y= profile_Con)) +
  geom_point(alpha=0.5, col = "steelblue2")+
  stat_smooth(se=F, method='lm', col = "black")+
  coord_cartesian (ylim=c(0,100), xlim=c(0,1))+
  ggtitle("Party Experiment") + 
  ggplot2::ylab("Guessed percentage")+
  ggplot2::xlab("Predicted Probability")+
  theme_classic()


```

```{r,include=FALSE}
#Mahalabonis distance

#Prepating data for Mahalabonis distnace calculation. Creating dummies representing matching attribute of porfile and respondent

#Party

#Gender
Parties.Experiment.Clean<- Parties.Experiment.Clean %>%
  mutate(Match.Gender = ifelse(respondent_Gender == profile_Gender, 1, 0))
#Region
Parties.Experiment.Clean<- Parties.Experiment.Clean %>%
  mutate(respondent_Region = recode(respondent_Region,
                                    "East Midlands" = "the East Midlands",
                                    "East of England" = "the East of England",
                                    "Yorkshire and the Humber" = "Yorkshire & Humber"),
         respondent_Region = as.character(respondent_Region),
         profile_Region = as.character(profile_Region))
fitted_BESCoef_PartyExp<- fitted_BESCoef_PartyExp %>%
  mutate(respondent_Region = recode(respondent_Region,
                                    "East Midlands" = "the East Midlands",
                                    "East of England" = "the East of England",
                                    "Yorkshire and the Humber" = "Yorkshire & Humber"),
         respondent_Region = as.character(respondent_Region),
         profile_Region = as.character(profile_Region))

Parties.Experiment.Clean<- Parties.Experiment.Clean %>%
  mutate(Match.Region = ifelse(respondent_Region == profile_Region, 1, 0))
#Education
Parties.Experiment.Clean<- Parties.Experiment.Clean %>%
  mutate(Match.Education =
           case_when(respondent_Education ==  "Level 5 and above" & profile_Education == "Holds a university degree" ~ 1,
                     respondent_Education !=  "Level 5 and above" & profile_Education != "Holds a university degree" ~ 1,
                     TRUE ~ 0))
#Age
Parties.Experiment.Clean<- Parties.Experiment.Clean %>%
  mutate(Match.Age =
           ifelse(abs(profile_Age - respondent_Age) > 10, 0, 1))

#Ethnicity. What to do with unknown?

Parties.Experiment.Clean<- Parties.Experiment.Clean %>%
  mutate(respondent_Ethnicity = fct_collapse(respondent_Ethnicity,
    White = c("English / Welsh / Scottish / Northern Irish / British", "Irish", "Gypsy or Irish Traveller", "Any other White background"),
    Mixed = c("White and Black African", "White and Black Caribbean", "White and Asian", "Any other Mixed / Multiple ethnic background"),
    Asian = c("Indian", "Pakistani", "Bangladeshi", "Chinese", "Any other Asian background"),
    Black = c("African", "Caribbean", "Any other Black / African / Caribbean background"),
    Other = c("Any other ethnic group", "Arab"),
    Unknown = "Prefer not to say"))
Parties.Experiment.Clean<- Parties.Experiment.Clean %>%
  mutate(
    respondent_Ethnicity = as.character(respondent_Ethnicity),
    profile_Ethnicity =as.character(profile_Ethnicity)
  )
fitted_BESCoef_PartyExp<- fitted_BESCoef_PartyExp %>%
  mutate(respondent_Ethnicity = fct_collapse(respondent_Ethnicity,
    White = c("English / Welsh / Scottish / Northern Irish / British", "Irish", "Gypsy or Irish Traveller", "Any other White background"),
    Mixed = c("White and Black African", "White and Black Caribbean", "White and Asian", "Any other Mixed / Multiple ethnic background"),
    Asian = c("Indian", "Pakistani", "Bangladeshi", "Chinese", "Any other Asian background"),
    Black = c("African", "Caribbean", "Any other Black / African / Caribbean background"),
    Other = c("Any other ethnic group", "Arab"),
    Unknown = "Prefer not to say"))
Parties.Experiment.Clean<- Parties.Experiment.Clean %>%
  mutate(
    respondent_Ethnicity = as.character(respondent_Ethnicity),
    profile_Ethnicity =as.character(profile_Ethnicity)
  )

Parties.Experiment.Clean<- Parties.Experiment.Clean %>%
  mutate(Match.Ethnicity = ifelse(respondent_Ethnicity == profile_Ethnicity, 1, 0))

#Income (matches are not perfect)
levels(Parties.Experiment.Clean$profile_AnnualHouseholdIncome)
Parties.Experiment.Clean<- Parties.Experiment.Clean %>%
  mutate(respondent_AnnualHouseholdIncome = fct_collapse(respondent_AnnualHouseholdIncome,
     "Less than 15,599" = c("under Â£5,000 per year", "Â£5,000 to Â£9,999 per year","Â£10,000 to Â£14,999 per year"),
    "Between 15,600 and 25,999" = c("Â£15,000 to Â£19,999 per year", "Â£20,000 to Â£24,999 per year"),
    "Between 26,000 and 44,999" = c("Â£25,000 to Â£29,999 per year", "Â£30,000 to Â£34,999 per year", "Â£35,000 to Â£39,999 per year", "Â£40,000 to Â£44,999 per year"),
    "Between 45,000 and 99,999" = c("Â£45,000 to Â£49,999 per year",   "Â£50,000 to Â£59,999 per year", "Â£60,000 to Â£69,999 per year", "Â£70,000 to Â£99,999 per year"),
    "Greater than 100,000" = c("Â£100,000 to Â£149,999 per year", "Â£150,000 and over")),
    profile_AnnualHouseholdIncome = fct_collapse(profile_AnnualHouseholdIncome,
      "Less than 15,599" = c("Less than 5,199", "Between 5,200 and 15,599"),
      "Between 45,000 and 99,999" = c("Between 45,000 and 59,999", "Between 60,000 and 99,999"),
      "Between 26,000 and 44,999" = c("Between 26,000 and 36,399", "Between 36,400 and 44,999")))
Parties.Experiment.Clean<- Parties.Experiment.Clean %>%
  mutate(
    respondent_AnnualHouseholdIncome = as.character(respondent_AnnualHouseholdIncome),
    profile_AnnualHouseholdIncome = as.character(profile_AnnualHouseholdIncome))
Parties.Experiment.Clean<- Parties.Experiment.Clean %>%
  mutate(Match.Income = ifelse(respondent_AnnualHouseholdIncome == profile_AnnualHouseholdIncome, 1, 0))
fitted_BESCoef_PartyExp<- fitted_BESCoef_PartyExp %>%
  mutate(respondent_AnnualHouseholdIncome = fct_collapse(respondent_AnnualHouseholdIncome,
     "Less than 15,599" = c("under Â£5,000 per year", "Â£5,000 to Â£9,999 per year","Â£10,000 to Â£14,999 per year"),
    "Between 15,600 and 25,999" = c("Â£15,000 to Â£19,999 per year", "Â£20,000 to Â£24,999 per year"),
    "Between 26,000 and 44,999" = c("Â£25,000 to Â£29,999 per year", "Â£30,000 to Â£34,999 per year", "Â£35,000 to Â£39,999 per year", "Â£40,000 to Â£44,999 per year"),
    "Between 45,000 and 99,999" = c("Â£45,000 to Â£49,999 per year",   "Â£50,000 to Â£59,999 per year", "Â£60,000 to Â£69,999 per year", "Â£70,000 to Â£99,999 per year"),
    "Greater than 100,000" = c("Â£100,000 to Â£149,999 per year", "Â£150,000 and over")))

#covariance distribution of the treatment
Treatment.P <- Parties.Experiment.Clean %>%
  select(starts_with("Match")) 

#Mahalonobis distance
Dx <- as.matrix((1-Treatment.P)) # to flip to match = 0
Sx <- var(Dx, na.rm=T)
M.PartySqr <- matrix(0, 5064)
for(i in 1:5064){
M.PartySqr[i,1] <- t(Dx[i,]) %*% inv(Sx) %*% Dx[i,]
}
M.Party <- sqrt(M.PartySqr)
Parties.Experiment.Clean$mahalanobis2<-M.Party
#Brexit
#Matching matrices for the respondents to our experiment and the treatment profiles


#Gender
Brexit.Experiment.Clean<- Brexit.Experiment.Clean %>%
  mutate(Match.Gender = ifelse(respondent_Gender == profile_Gender, 1, 0))
#Region
Brexit.Experiment.Clean<- Brexit.Experiment.Clean %>%
  mutate(respondent_Region = recode(respondent_Region,
                                    "East Midlands" = "the East Midlands",
                                    "East of England" = "the East of England",
                                    "Yorkshire and the Humber" = "Yorkshire & Humber"),
         respondent_Region = as.character(respondent_Region),
         profile_Region = as.character(profile_Region))
Brexit.Experiment.Clean<- Brexit.Experiment.Clean %>%
  mutate(Match.Region = ifelse(respondent_Region == profile_Region, 1, 0))
fitted_BESCoef_BrexExp<- fitted_BESCoef_BrexExp %>%
  mutate(respondent_Region = recode(respondent_Region,
                                    "East Midlands" = "the East Midlands",
                                    "East of England" = "the East of England",
                                    "Yorkshire and the Humber" = "Yorkshire & Humber"),
         respondent_Region = as.character(respondent_Region),
         profile_Region = as.character(profile_Region))


#Education
Brexit.Experiment.Clean<- Brexit.Experiment.Clean %>%
  mutate(Match.Education =
           case_when(respondent_Education ==  "Level 5 and above" & profile_Education == "Holds a university degree" ~ 1,
                     respondent_Education !=  "Level 5 and above" & profile_Education != "Holds a university degree" ~ 1,
                     TRUE ~ 0))
#Age
Brexit.Experiment.Clean<- Brexit.Experiment.Clean %>%
  mutate(Match.Age =
           ifelse(abs(profile_Age - respondent_Age) > 10, 0, 1))

#Ethnicity. What to do with unknown?

Brexit.Experiment.Clean<- Brexit.Experiment.Clean %>%
  mutate(respondent_Ethnicity = fct_collapse(respondent_Ethnicity,
                                             White = c("English / Welsh / Scottish / Northern Irish / British", "Irish", "Gypsy or Irish Traveller", "Any other White background"),
                                             Mixed = c("White and Black African", "White and Black Caribbean", "White and Asian", "Any other Mixed / Multiple ethnic background"),
                                             Asian = c("Indian", "Pakistani", "Bangladeshi", "Chinese", "Any other Asian background"),
                                             Black = c("African", "Caribbean", "Any other Black / African / Caribbean background"),
                                             Other = c("Any other ethnic group", "Arab"),
                                             Unknown = "Prefer not to say"))
Brexit.Experiment.Clean<- Brexit.Experiment.Clean %>%
  mutate(
    respondent_Ethnicity = as.character(respondent_Ethnicity),
    profile_Ethnicity =as.character(profile_Ethnicity)
  )
Brexit.Experiment.Clean<- Brexit.Experiment.Clean %>%
  mutate(Match.Ethnicity = ifelse(respondent_Ethnicity == profile_Ethnicity, 1, 0))

fitted_BESCoef_BrexExp<- fitted_BESCoef_BrexExp %>%
  mutate(respondent_Ethnicity = fct_collapse(respondent_Ethnicity,
                                             White = c("English / Welsh / Scottish / Northern Irish / British", "Irish", "Gypsy or Irish Traveller", "Any other White background"),
                                             Mixed = c("White and Black African", "White and Black Caribbean", "White and Asian", "Any other Mixed / Multiple ethnic background"),
                                             Asian = c("Indian", "Pakistani", "Bangladeshi", "Chinese", "Any other Asian background"),
                                             Black = c("African", "Caribbean", "Any other Black / African / Caribbean background"),
                                             Other = c("Any other ethnic group", "Arab"),
                                             Unknown = "Prefer not to say"))


#Income (matches are not perfect)
levels(Brexit.Experiment.Clean$profile_AnnualHouseholdIncome)
Brexit.Experiment.Clean<- Brexit.Experiment.Clean %>%
  mutate(respondent_AnnualHouseholdIncome = fct_collapse(respondent_AnnualHouseholdIncome,
                                                         "Less than 15,599" = c("under Â£5,000 per year", "Â£5,000 to Â£9,999 per year","Â£10,000 to Â£14,999 per year"),
                                                         "Between 15,600 and 25,999" = c("Â£15,000 to Â£19,999 per year", "Â£20,000 to Â£24,999 per year"),
                                                         "Between 26,000 and 44,999" = c("Â£25,000 to Â£29,999 per year", "Â£30,000 to Â£34,999 per year", "Â£35,000 to Â£39,999 per year", "Â£40,000 to Â£44,999 per year"),
                                                         "Between 45,000 and 99,999" = c("Â£45,000 to Â£49,999 per year",   "Â£50,000 to Â£59,999 per year", "Â£60,000 to Â£69,999 per year", "Â£70,000 to Â£99,999 per year"),
                                                         "Greater than 100,000" = c("Â£100,000 to Â£149,999 per year", "Â£150,000 and over")),
         profile_AnnualHouseholdIncome = fct_collapse(profile_AnnualHouseholdIncome,
                                                      "Less than 15,599" = c("Less than 5,199", "Between 5,200 and 15,599"),
                                                      "Between 45,000 and 99,999" = c("Between 45,000 and 59,999", "Between 60,000 and 99,999"),
                                                      "Between 26,000 and 44,999" = c("Between 26,000 and 36,399", "Between 36,400 and 44,999")))
Brexit.Experiment.Clean<- Brexit.Experiment.Clean %>%
  mutate(
    respondent_AnnualHouseholdIncome = as.character(respondent_AnnualHouseholdIncome),
    profile_AnnualHouseholdIncome = as.character(profile_AnnualHouseholdIncome))
Brexit.Experiment.Clean<- Brexit.Experiment.Clean %>%
  mutate(Match.Income = ifelse(respondent_AnnualHouseholdIncome == profile_AnnualHouseholdIncome, 1, 0))

fitted_BESCoef_BrexExp<- fitted_BESCoef_BrexExp %>%
  mutate(respondent_AnnualHouseholdIncome = fct_collapse(respondent_AnnualHouseholdIncome,
                                                         "Less than 15,599" = c("under Â£5,000 per year", "Â£5,000 to Â£9,999 per year","Â£10,000 to Â£14,999 per year"),
                                                         "Between 15,600 and 25,999" = c("Â£15,000 to Â£19,999 per year", "Â£20,000 to Â£24,999 per year"),
                                                         "Between 26,000 and 44,999" = c("Â£25,000 to Â£29,999 per year", "Â£30,000 to Â£34,999 per year", "Â£35,000 to Â£39,999 per year", "Â£40,000 to Â£44,999 per year"),
                                                         "Between 45,000 and 99,999" = c("Â£45,000 to Â£49,999 per year",   "Â£50,000 to Â£59,999 per year", "Â£60,000 to Â£69,999 per year", "Â£70,000 to Â£99,999 per year"),
                                                         "Greater than 100,000" = c("Â£100,000 to Â£149,999 per year", "Â£150,000 and over")))



#covariance distribution of the treatment
Treatment.B <- Brexit.Experiment.Clean %>%
  select(starts_with("Match")) 
mahalanobisB<-data.frame("mahalanobis" = mahalanobis(Treatment.B,center=F,var(Treatment.B, na.rm=T)))
Brexit.Experiment.Clean<-bind_cols(Brexit.Experiment.Clean, mahalanobisB)

#Mahalonobis distance

Dx <- as.matrix((1-Treatment.B)) # to flip to match = 0
Sx <- var(Dx, na.rm=T)
M.BrexitSqr <- matrix(0, 5082)
for(i in 1:5082){
  M.BrexitSqr[i,1] <- t(Dx[i,]) %*% inv(Sx) %*% Dx[i,]
}
M.Brexit <- sqrt( M.BrexitSqr)
Brexit.Experiment.Clean$mahalanobis2<-M.Brexit


#Using Mahalanobis distance to predict brier score and dichotomous guess

#Brexit
#Brier Score
model.Brier.MahalB<-lm(BrierScore~mahalanobis2, weights=Brexit.Experiment.Clean$respondent_weights, data = Brexit.Experiment.Clean)
model.Brier.Mahal.AllB<-lm(BrierScore~mahalanobis2 +respondent_PoliticalAttention+respondent_pastvote_2017+respondent_pastvote_EURef+respondent_Age+respondent_Education+respondent_Gender+respondent_Region+ respondent_Ethnicity, weights=Brexit.Experiment.Clean$respondent_weights, data = Brexit.Experiment.Clean)

#Dichotomous guess
fitted_BESCoef_BrexExp$mahalanobis2<- M.Brexit


model.Dich.Mahal.B <- lm(Correct.Dich.Guess~mahalanobis2, weights=fitted_BESCoef_BrexExp$respondent_weights, data = fitted_BESCoef_BrexExp)
model.Dich.Mahal.AllB <- lm(Correct.Dich.Guess~mahalanobis2+ respondent_PoliticalAttention+respondent_pastvote_2017+respondent_pastvote_EURef+respondent_Age+respondent_Education+respondent_Gender+respondent_Region + respondent_Ethnicity, weights=fitted_BESCoef_BrexExp$respondent_weights, data = fitted_BESCoef_BrexExp)

#Party
#Brier Score
model.Brier.MahalP<-lm(BrierScore~mahalanobis2, weights=Parties.Experiment.Clean$respondent_weights, data = Parties.Experiment.Clean)
model.Brier.Mahal.AllP<-lm(BrierScore~mahalanobis2 +respondent_PoliticalAttention+respondent_pastvote_2017+respondent_pastvote_EURef+respondent_Age+respondent_Education+respondent_Gender+respondent_Region + respondent_Ethnicity, weights=Parties.Experiment.Clean$respondent_weights, data = Parties.Experiment.Clean)

#Dichotomous guess
fitted_BESCoef_PartyExp$mahalanobis2<- M.Party

model.Dich.Mahal.P <- lm(Correct.Dich.Guess~mahalanobis2, weights=fitted_BESCoef_PartyExp$respondent_weights, data = fitted_BESCoef_PartyExp)
model.Dich.Mahal.AllP <- lm(Correct.Dich.Guess~mahalanobis2+ respondent_PoliticalAttention+respondent_pastvote_2017+respondent_pastvote_EURef+respondent_Age+respondent_Education+respondent_Gender+respondent_Region + respondent_Ethnicity, weights=fitted_BESCoef_PartyExp$respondent_weights, data = fitted_BESCoef_PartyExp)

```

```{r,echo=FALSE}
Parties_NoImputation <- read.csv("Parties_No_Imputation.csv",
                                 na.strings= "")
Brexit_NoImputation <- read.csv("EU_No_Imputation.csv",
                                na.strings= "")

Parties_NoImputation <- Parties_NoImputation %>%
  select(-finalserialno, -serial, -b02, -vote_2017, -weight_electorate) %>%
  dplyr::rename("Subjective Class" = "Sub_Class", "Subjective Family Class" = "Sub_Fam_Clas", "Home Status" = "Home_status")
Brexit_NoImputation <- Brexit_NoImputation %>%
  select(-finalserialno, -serial, -EU_2016, -weight_electorate)%>%
  dplyr::rename("Subjective Class" = Sub_Class, "Subjective Family Class" = Sub_Fam_Clas, "Home Status" = Home_status)
  
P.Brexit_Miss <- vis_miss(Brexit_NoImputation)
P.Parties_Miss <- vis_miss(Parties_NoImputation)
```


# Additional Tables and Figures


```{r, fig.width = 8, fig.height = 6, out.height="50%",out.width="\\linewidth",echo=FALSE,fig.cap="Missing values in profiles before imputation for Brexit experiment (top) and Parties experiment (bottom). \\label{Missingness}",fig.show='hold',fig.align='center', fig.pos="H"}
grid.arrange(P.Brexit_Miss,P.Parties_Miss, ncol=1, nrow=2)

```


```{r, fig.width = 8, fig.height = 6, out.height="50%",out.width="\\linewidth",echo=FALSE,fig.cap="Average guessed percentages for respondents grouped by their own referendum vote (top) and general election vote (bottom). \\label{guesses_by_past_vote}",fig.show='hold',fig.align='center', fig.pos="H"}
grid.arrange(guess.by.Brexit.vote,guess.by.party.vote, ncol=1, nrow=2)
```


\singlespacing <!-- for below table -->

```{r, echo = FALSE, results='asis', out.width="97%", fig.show='hold'}

texreg(list(model.Brier.MahalB, model.Brier.MahalP, model.Dich.Mahal.B, model.Dich.Mahal.P), 
       float.pos="b",
       single.row=FALSE,
       digits = 3,
       fontsize = "footnotesize",
       stars = c(0.01, 0.05, 0.1),
       longtable=FALSE,
       use.packages=FALSE,
       caption="Coefficient Estimates for a Regression Model for Brier Score and Correct Dichotomized Guess by Mahalonobis Distance \\label{Mahalonobisregression}",
       custom.header= list("Brier Score" = 1:2, "Correct Dichotomized Guess" = 3:4),
       custom.model.names =c("Brexit Exp.","Party Exp.", "Brexit Exp.", "Party Exp."),
       custom.coef.names= c("Intercept","Mahalonobis Distance"))
```

\doublespacing

